Intro to Urban Analytics

Bon Woo Koo & Subhrajit Guhathakurta

2022-08-20

Environment Setting

# Import required packages
library(tidyverse)
library(tmap)
library(ggplot2)
library(units)
library(sf)
library(leaflet)
library(tidycensus)
library(leafsync)
library(dbscan)
library(sfnetworks)
library(tigris)
library(tidygraph)
library(plotly)

wd <- file.path(Sys.getenv('setwd'),"work/working/School/UA_2022/external/Lab/module_2")
setwd(eval(wd))

What is Open Street Map

https://journal.r-project.org/archive/2013/RJ-2013-005/RJ-2013-005.pdf

https://r-spatial.org/r/2019/09/26/spatial-networks.html

https://cran.r-project.org/web/packages/dodgr/vignettes/dodgr.html

library(osmdata)
## Data (c) OpenStreetMap contributors, ODbL 1.0. https://www.openstreetmap.org/copyright
# Get bounding box coordinates for Atlanta
bb <- getbb('Atlanta, GA')

# Converting bb into an sf object
bb_sf <- bb %>% t %>% data.frame() %>% 
  st_as_sf(coords = c("x", "y"), crs = 4326) %>% 
  st_bbox() %>% 
  st_as_sfc()

## Plot-- 
tmap_mode('view')
## tmap mode set to interactive viewing
tm_shape(bb_sf) + tm_borders()

OSM wiki provides a detailed description on various ‘key-value’ pairs. To download all possible key:value pairs, you can insert available_tags("highway") instead of manually specifying a list of values. One caveat is that, using all available tags will generate a large data, significantly slowing down the processing speed.

# Get OSM road data
osm_road <- opq(bbox = bb) %>%
  add_osm_feature(key = 'highway', 
                  value = c("motorway", "trunk", "primary", 
                            "secondary", "tertiary", "unclassified",
                            "residential")) %>%
  osmdata_sf() %>% 
  osm_poly2line()

names(osm_road)
## [1] "bbox"              "overpass_call"     "meta"             
## [4] "osm_points"        "osm_lines"         "osm_polygons"     
## [7] "osm_multilines"    "osm_multipolygons"
## Plot--
tmap_mode('plot')
## tmap mode set to plotting
tm_shape(osm_road$osm_lines) + tm_lines(col = "highway")

# Breakdown of highway types
round( prop.table(table(osm_road$osm_lines$highway)) * 100, 1 )
## 
##      motorway motorway_link       primary   residential     secondary 
##           3.7           0.0           3.7          68.4          11.6 
##      tertiary         trunk  unclassified 
##           8.6           1.5           2.5

Defining a custom bounding box

Displaying (and some computations) the entire network we just downloaded can slow down R environment. For class exercise, we will do the calculation for the whole network but limit the display to a smaller area. For that, we will create a custom bbox using coordinates of our selection. This technique of generating your own bounding box can also be useful if you have a specific area of interest that doesn’t overlap well with commonly used boundaries.

You need to define two points for bounding box, one point at the lower left corner and the other at the upper right corner. You can go to Google Maps, right-click on a point on map, and copy the XY coordinate. Let’s store them in p1 and p2.

# p1 is lower left corner, p2 is the upper right corner
p1 <- c(33.746217847959734, -84.40851957882589)
p2 <- c(33.785889694219634, -84.36354430149285)

You will then need to format the two coordinates into the same format as bb, the bounding box object we created above. Then, let’s convert it into sf object.

# Custom BB
my_bb <- matrix(c(p1[2], p1[1],
                  p2[2], p2[1]), ncol = 2)

colnames(my_bb) <- c("min", "max")
rownames(my_bb) <- c("x", "y")

# Custom BB to sf
my_bb_sf <- my_bb %>% t %>% data.frame() %>% 
  st_as_sf(coords = c("x", "y"), crs = 4326) %>% 
  st_bbox() %>% 
  st_as_sfc()

## Plot--
tmap_mode('view')
## tmap mode set to interactive viewing
tm_shape(bb_sf) + tm_borders(col = "black") +  # Black = larger bbox
  tm_shape(my_bb_sf) + tm_borders(col = "red")  # Red = smaller bbox

Converting OSM data into a graph

# Extract a smaller network for exercise purpose
osm_small <- osm_road$osm_lines[my_bb_sf,]

# Converting the line component of OSM data into a graph (using a projected coordinate system for filtering below)
net <- sfnetworks::as_sfnetwork(osm_small, directed = FALSE) 
print(net)
## # A sfnetwork with 1668 nodes and 1609 edges
## #
## # CRS:  EPSG:4326 
## #
## # An undirected multigraph with 246 components with spatially explicit edges
## #
## # Node Data:     1,668 × 1 (active)
## # Geometry type: POINT
## # Dimension:     XY
## # Bounding box:  xmin: -84.41749 ymin: 33.73862 xmax: -84.3525 ymax: 33.7971
##               geometry
##            <POINT [°]>
## 1 (-84.39738 33.76535)
## 2 (-84.39613 33.76534)
## 3 (-84.40845 33.77004)
## 4 (-84.40837 33.77255)
## 5   (-84.3786 33.7669)
## 6 (-84.37793 33.76562)
## # … with 1,662 more rows
## #
## # Edge Data:     1,609 × 239
## # Geometry type: LINESTRING
## # Dimension:     XY
## # Bounding box:  xmin: -84.41749 ymin: 33.73862 xmax: -84.3525 ymax: 33.7971
##    from    to osm_id name  abando… abando… access access… addr.c… addr.c…
##   <int> <int> <chr>  <chr> <chr>   <chr>   <chr>  <chr>   <chr>   <chr>  
## 1     1     2 92346… Mill… <NA>    <NA>    <NA>   <NA>    <NA>    <NA>   
## 2     3     4 92346… Trav… <NA>    <NA>    <NA>   <NA>    <NA>    <NA>   
## 3     5     6 92354… Rena… <NA>    <NA>    priva… <NA>    <NA>    <NA>   
## # … with 1,606 more rows, and 229 more variables: addr.housenumber <chr>,
## #   addr.postcode <chr>, addr.state <chr>, addr.street <chr>, alt_name <chr>,
## #   alt_name_1 <chr>, attribution <chr>, bicycle <chr>,
## #   bicycle.lanes.backward <chr>, bicycle.lanes.forward <chr>, bridge <chr>,
## #   bridge.name <chr>, bus <chr>, bus.lanes <chr>, center_turn_lane <chr>,
## #   centre_turn_lane <chr>, change <chr>, change.backward <chr>,
## #   change.forward <chr>, change.lanes <chr>, change.lanes.backward <chr>,
## #   change.lanes.forward <chr>, check_date <chr>, check_date.surface <chr>,
## #   class.bicycle <chr>, construction <chr>, covered <chr>, created_by <chr>,
## #   cutting <chr>, cycleway <chr>, cycleway.both <chr>,
## #   cycleway.est_width <chr>, cycleway.left <chr>, cycleway.right <chr>,
## #   description <chr>, destination <chr>, destination.backward <chr>,
## #   destination.forward <chr>, destination.lanes <chr>,
## #   destination.lanes.forward <chr>, destination.ref <chr>,
## #   destination.ref.backward <chr>, destination.ref.forward <chr>,
## #   destination.ref.lanes <chr>, destination.ref.lanes.forward <chr>,
## #   destination.ref.to <chr>, destination.ref.to.backward <chr>,
## #   destination.ref.to.lanes <chr>, destination.symbol <chr>,
## #   embedded_rails <chr>, expressway <chr>, fee <chr>, fixme <chr>,
## #   FIXME <chr>, foot <chr>, ford <chr>, gatech.PRKG_NUM <chr>, goods <chr>,
## #   HFCS <chr>, hgv <chr>, hgv.lanes <chr>, hgv.minaxles <chr>,
## #   hgv.national_network <chr>, highway <chr>, highway_1 <chr>, horse <chr>,
## #   hov <chr>, hov.lanes <chr>, incline <chr>, is_in.city <chr>,
## #   junction <chr>, junction.ref <chr>, LandPro08.DE3 <chr>,
## #   LandPro08.DE5 <chr>, lane_markings <chr>, lanes <chr>,
## #   lanes.backward <chr>, lanes.both_ways <chr>, lanes.forward <chr>,
## #   layer <chr>, level <chr>, lit <chr>, loc_name <chr>, maxheight <chr>,
## #   maxheight.lane <chr>, maxlength <chr>, maxspeed <chr>,
## #   maxspeed.advisory <chr>, maxspeed.type <chr>, maxspeed.variable <chr>,
## #   maxweight <chr>, maxweight.signed <chr>, minspeed <chr>,
## #   motor_vehicle <chr>, motor_vehicle.conditional <chr>,
## #   motor_vehicle.lanes <chr>, motorcycle <chr>, motorcycle.lanes <chr>,
## #   name.etymology.wikidata <chr>, name_ <chr>, …
## Plot--
leaflet() %>% 
  addProviderTiles(providers$CartoDB.DarkMatter) %>% 
  setView( -84.3854, 33.7668, zoom = 14) %>% # zooming in to show more details
  addPolylines(data = net %>% st_as_sf('edges'), col = "grey", weight = 3, opacity = 0.9) %>% 
    addCircles(data = net %>% st_as_sf('nodes'), fillColor = 'yellow', 
               stroke = F, radius = 20, fillOpacity = 0.7) 

Cleaning network

Simplifying network

This content is heavily based on this sfnetwork tutorial

Multiple edges can connect the same pair of nodes, called multiple edges. There can also be loops that starts and ends at the same node (e.g., think of a cul-de-sac). The former case can be detected using edge_is_multiple() and the latter using edge_is_loop().

When removing a set of multiple edges using functions shown below, they always keep the first edge and discard others. By arranging the order of edges for each set of multiple edges, you can specify which edge you want to preserve.

This way of simplification means that, when the multiple edges within a set have different attributes, all attribute information except for the preserved one would be lost. In such cases, you can merge those edges. Then, the output would have the geometry of the first edge in the set, but the attributes would be some summary of all the edges in the set. to_spatial_simple() function does this work.

# Let's simplify our network
simple_net <- net %>%
  activate("edges") %>%
  filter(!edge_is_multiple()) %>%
  filter(!edge_is_loop()) 

# Because the difference is not really discernible, we just print out the differences in the edge count.
message(str_c("Before simplification, there were ", net %>% st_as_sf("edges") %>% nrow(), " edges. \n",
            "After simplification, there are ", simple_net %>% st_as_sf("edges") %>% nrow(), " edges."))
## Before simplification, there were 1609 edges. 
## After simplification, there are 1600 edges.

Subdivide edges

When as_sfnetwork() function converts an sf linestrings, the nodes are defined as the endpoints of each linestring. If you zoom into Midtown in the map above, you will see that there are many intersections that do not have nodes, which are errors. We can use to_spatial_subdivision predicate to create points at the intersections and cut the edges accordingly.

To better understand

# Subdivision
subdiv_net <- convert(simple_net, sfnetworks::to_spatial_subdivision)
## Warning: to_spatial_subdivision assumes attributes are constant over geometries
# Add a custom index -> This will be used for visualization purposes later
subdiv_net <- subdiv_net %>% 
  activate("nodes") %>% 
  mutate(custom_id = seq(1, subdiv_net %>% st_as_sf("nodes") %>% nrow()),
         is.new = case_when(is.na(.tidygraph_node_index) ~ "new nodes",
                            TRUE ~ "existing nodes"),
         is.new = factor(is.new)) 

## Plot--
subdiv_pal <- colorFactor(palette = c("yellow", "red"), domain = subdiv_net %>% st_as_sf("nodes") %>% pull(is.new))

subdiv_map <- leaflet() %>% 
  addProviderTiles(providers$CartoDB.DarkMatter) %>% 
  setView( -84.3854, 33.7668, zoom = 14) %>% # zooming in to show more details
  addPolylines(data = subdiv_net %>% st_as_sf('edges'), col = "grey", weight = 3, opacity = 0.9) %>% 
  addCircles(data = subdiv_net %>% st_as_sf('nodes'), fillColor = ~subdiv_pal(is.new), stroke = F, radius = 20, fillOpacity = 0.7) %>% 
  addLegend(position = "bottomright", pal = subdiv_pal, values = subdiv_net %>% st_as_sf("nodes") %>% pull(is.new)) 

subdiv_map

Delete pseudo nodes

smoothed_net <- convert(subdiv_net, sfnetworks::to_spatial_smooth)

# Extract removed points
removed <- setdiff(subdiv_net %>% st_as_sf('nodes') %>% pull(custom_id), 
                   smoothed_net %>% st_as_sf('nodes') %>% pull(custom_id))

smooth_map <- leaflet() %>% 
  addProviderTiles(providers$CartoDB.DarkMatter) %>% 
  setView( -84.3854, 33.7668, zoom = 14) %>% # zooming in to show more details
  addPolylines(data = smoothed_net %>% st_as_sf('edges'), col = "grey", weight = 3, opacity = 0.9) %>% 
    addCircles(data = smoothed_net %>% st_as_sf('nodes'), fillColor = 'yellow', stroke = F, radius = 20, fillOpacity = 0.7) %>% 
    addCircles(data = subdiv_net %>% st_as_sf("nodes") %>% filter(custom_id %in% removed), 
               fillColor = "red", stroke = F, radius = 15, fillOpacity = 0.8) %>% 
  addControl(html = htmltools::HTML("<b>Red points denote removed nodes</b>"), position = "bottomright")

smooth_map

Calculate centrality

# Calculate centrality measures
network_char <- smoothed_net %>% 
  activate("edges") %>%
  mutate(weight = edge_length()) %>% 
  mutate(edge_bc = centrality_edge_betweenness(weights = weight, directed = F)) %>%
  activate("nodes") %>% 
  mutate(node_bc = centrality_betweenness(weights = weight, directed = F))
## Warning in betweenness(graph = graph, v = V(graph), directed = directed, :
## 'nobigint' is deprecated since igraph 1.3 and will be removed in igraph 1.4
# Edge betweenness
bet_pal_edge <- colorNumeric(palette = "Reds", domain = network_char %>% activate("edges") %>% pull(edge_bc), n = 6)

# Node betweenness
bet_pal_node <- colorNumeric(palette = "Reds", domain = network_char %>% activate("nodes") %>% pull(node_bc), n = 6)

# Map
leaflet() %>% 
  addProviderTiles(providers$CartoDB.DarkMatter) %>%
  setView( -84.3854, 33.7668, zoom = 14) %>% # zooming in to show more details
  addPolylines(data = network_char %>% st_as_sf("edges"), 
               color = ~bet_pal_edge(network_char %>% st_as_sf('edges') %>% pull(edge_bc)), weight = 3, opacity = 0.7) %>% 
  addCircles(data = network_char %>% st_as_sf("nodes"), 
               fillColor = ~bet_pal_node(network_char %>% st_as_sf('nodes') %>% pull(node_bc)), stroke = F, fillOpacity = 0.7, 
             radius = network_char %>% st_as_sf("nodes") %>% with(.$node_bc/(max(.$node_bc)/100))) # denominator is selected to make the max value roughly equal to 100
# Try 2 more other centrality measures.
# Find good ways to calculate the radius argument in addCircles() function for visually pleasing maps for the 2 centrality measures.

Shortest path

# Change crs for convenience
smoothed_net <- smoothed_net %>% st_transform(4326)

# Start point
start_p <- st_point(c(-84.40364459476174,33.776160322717544)) %>% st_sfc(crs = 4326) # CRC at Georgia Tech
# End point
target_p <- st_point(c(-84.37639335217811, 33.75718076235044)) %>% st_sfc(crs = 4326) # MLK National Historical Park
# Get the shortest path
paths = st_network_paths(smoothed_net, from = start_p, to = target_p)
# Extract shortest path
paths_sf <- smoothed_net %>%
  slice(paths$node_paths[[1]])

# Visualize
leaflet() %>% 
  addProviderTiles(providers$CartoDB.DarkMatter) %>% 
  setView( -84.3854, 33.7668, zoom = 14) %>% # zooming in to show more details
  addPolylines(data = smoothed_net %>% st_as_sf("edges"), color = 'grey', weight = 2, opacity = 0.7) %>% 
  addPolylines(data = paths_sf %>% st_as_sf("edges"), color = "red", weight = 4, opacity = 0.8) %>% 
  addCircles(data = paths_sf %>% st_as_sf("nodes"), stroke = F, fillColor = "red", fillOpacity = 0.8, radius = 50)
## Assignment: sample one point from each tract, calculate average travel time from one census tract to all others. Repeat for all tracts.
## See if there is any patterns to the accessibility.

Extract intersections

end_points <- smoothed_net %>% 
  st_as_sf('nodes') %>% 
  st_join(smoothed_net %>% activate("edges") %>% st_as_sf())

intersections <- end_points %>% 
  group_by(.tidygraph_node_index) %>% 
  summarise(n = n()) %>% 
  filter(n > 1) 

leaflet() %>% 
  addProviderTiles(providers$CartoDB.DarkMatter) %>% 
  setView( -84.3854, 33.7668, zoom = 14) %>% # zooming in to show more details
  addPolylines(data = smoothed_net %>% st_as_sf('edges'), col = "grey", weight = 3, opacity = 0.9) %>% 
    addCircles(data = intersections %>% st_as_sf('nodes'), fillColor = 'red', stroke = F, radius = 20, fillOpacity = 0.7) 

Merge it with Census data

Let’s recycle the code for downloading Census data through API from last week.

acs2020c <- acs2020 %>% 
  select(GEOID,
         hhinc = hhincE,
         r_tot = r_totE,
         r_wh = r_whE,
         r_bl = r_blE,
         tot_hh = tot_hhE,
         own_novhc = own_novhcE,
         rent_novhc = rent_novhcE) %>% 
  mutate(pct_wh = r_wh / r_tot,
         pct_bl = r_bl / r_tot,
         pct_novhc = (own_novhc + rent_novhc)/tot_hh) %>% 
  mutate(area1 = unclass(st_area(.))) %>% 
  st_transform(26967) %>% 
  mutate(area2 = unclass(st_area(.))) %>% 
  st_transform(crs = 4326) %>% 
  mutate(ln_pop_den = log((r_tot / (area1/1000^2)) + 1)) %>% 
  filter(!is.na(hhinc), !is.na(r_tot), !is.na(own_novhc))

Spatial join the Census data with our network

# Before we do the join, let's use centrality measures that were calculated on the entire network.
# It takes time, so I calculated it and saved it.
# You can read the data back in
load("D:/Dropbox (GaTech)/Work/Working/School/UA_2022/external/Lab/module_2/network_char.RData")


census_centrality <- acs2020c %>%
  st_join(network_char %>% st_as_sf("nodes") %>% st_transform(crs = 4326)) %>% # When I calculated centrality measures, the CRS was 26967, so reverting it back to 4326
  group_by(GEOID) %>%
  summarise(n = n(),
            hhinc = mean(hhinc, na.rm = T),
            pct_wh = mean(pct_wh, na.rm = T),
            pct_bl = mean(pct_bl, na.rm = T),
            pct_novhc = mean(pct_novhc, na.rm = T),
            node_bc = mean(node_bc, na.rm = T))
tm_shape(acs2020c) + tm_polygons(col = "grey", alpha = 0.5) +
  tm_shape(census_centrality) + tm_polygons(col = "node_bc", style = "quantile") +
  tm_shape(bb_sf) + tm_borders()
census_centrality_plot <- census_centrality %>%
  mutate(hhinc = log(hhinc),
         pct_novhc = log(pct_novhc + 0.02)) %>%
  pivot_longer(cols = c('hhinc', 'pct_wh', 'pct_bl', 'pct_novhc'), names_to = "variable", values_to = "value") %>% 
  mutate(variable = factor(variable, labels = c("Household Income (logged)", "% Black", "% No HH with no cars (logged)", "% White")))

centrality_plot <- census_centrality_plot %>%
  ggplot() +
  geom_point(aes(x = node_bc, y = value), alpha = 0.2) +
  facet_wrap(~variable, scales = "free_y") +
  labs(x = "Centrality", title = "Centrality VS. Socio-demographics") +
  theme_bw()

centrality_plot + ggpubr::stat_cor(aes(x = node_bc, y = value))
## Warning: Removed 896 rows containing non-finite values (stat_cor).
## Warning: Removed 896 rows containing missing values (geom_point).

ggplotly(centrality_plot)